library(tidyverse)
library(lubridate)
#import data
library(readr)
library(plotly)
move_fri <- read_csv("~/Documents/2021 Spring/SDS235/DC2-data/Movement Data/park-movement-Fri.csv")
move_sat <- read_csv("~/Documents/2021 Spring/SDS235/DC2-data/Movement Data/park-movement-Sat.csv")
move_sun <- read_csv("~/Documents/2021 Spring/SDS235/DC2-data/Movement Data/park-movement-Sun.csv")
#for each id, their min Timestamp is entry_time and max Timestamp is exit_time
time_move_fri <- move_fri %>%
mutate(Timestamp = ymd_hms(Timestamp)) %>%
group_by(id) %>%
mutate(exit_time = max(Timestamp),
entry_time = min(Timestamp))
#only keep individual's exit_time and entry_time to reduce entries -- now each id has two entries
time_move_fri <- time_move_fri %>%
filter(Timestamp %in% c(exit_time, entry_time))
#calculate duration of time
duration_fri <- time_move_fri %>%
group_by(id) %>%
mutate(duration = exit_time - entry_time) %>%
mutate(duration = as.numeric(duration)) %>%
mutate(duration = round(duration, digits = 6))
#check group size
#n/2 because each id has two entries now
group_size_fri <- duration_fri %>%
group_by(duration) %>%
summarize(group_size=n()/2) %>%
arrange(desc(group_size))
## `summarise()` ungrouping output (override with `.groups` argument)
#plot ppl's entry time,
#round to half an hour
entry_fri <- time_move_fri %>%
distinct(id, entry_time) %>%
group_by(entry_time = round_date(entry_time, "10 mins"))%>%
summarize(num_entry = n())
## `summarise()` ungrouping output (override with `.groups` argument)
entry_p_fri <- entry_fri %>%
ggplot(aes(x=entry_time, y=num_entry)) +
geom_line(color="#000066") +
xlab("Entry Time on Friday") +
coord_cartesian( ylim = c(0,1250)) +
# scale_colour_manual(values="#000066") +
scale_x_datetime(date_breaks = "2 hours", date_labels = "%a %I %p") +
theme_minimal()
#round to 10 mins so that it's easier to group_by
exit_fri <- time_move_fri %>%
distinct(id, exit_time) %>%
group_by(exit_time = round_date(exit_time, "10 mins"))%>%
summarize(num_entry = n())
## `summarise()` ungrouping output (override with `.groups` argument)
exit_p_fri <- exit_fri %>%
ggplot(aes(x=exit_time, y=num_entry)) +
geom_line(color="#000066") +
xlab("Exit Time on Friday") +
coord_cartesian(ylim = c(0,750)) +
scale_x_datetime(date_breaks = "2 hours", date_labels = "%a %I %p") +
theme_minimal()
group_size_p_fri <- group_size_fri %>%
ggplot(aes(x=duration,y=group_size)) +
geom_point(color="#000066") +
ylim(c(0,45)) +
ggtitle("Scatterplot of Duration And Group Size on Friday") +
theme_minimal()
#repeat the same thing for Saturday
time_move_sat <- move_sat %>%
mutate(Timestamp = ymd_hms(Timestamp)) %>%
group_by(id) %>%
mutate(exit_time = max(Timestamp),
entry_time = min(Timestamp)) %>%
filter(Timestamp %in% c(exit_time, entry_time))
duration_sat <- time_move_sat %>%
group_by(id) %>%
mutate(duration = difftime(exit_time, entry_time, units = "hours")) %>%
mutate(duration = as.numeric(duration)) %>%
mutate(duration = round(duration, digits = 6))
#check group size
#n/2 because each id has two entries now
group_size_sat <- duration_sat %>%
group_by(duration) %>%
summarize(group_size=n()/2) %>%
arrange(desc(group_size))
## `summarise()` ungrouping output (override with `.groups` argument)
#plot ppl's entry time,
#round to 10 mins
entry_sat <- time_move_sat %>%
distinct(id, entry_time) %>%
group_by(entry_time = round_date(entry_time, "10 mins"))%>%
summarize(num_entry = n())
## `summarise()` ungrouping output (override with `.groups` argument)
entry_p_sat <- entry_sat %>%
ggplot(aes(x=entry_time, y=num_entry)) +
geom_line(color="#339999") +
xlab("Entry Time on Saturday") +
coord_cartesian( ylim = c(0,1250)) +
scale_x_datetime(date_breaks = "2 hours", date_labels = "%a %I %p") +
theme_minimal()
exit_sat <- time_move_sat %>%
distinct(id, exit_time) %>%
group_by(exit_time = round_date(exit_time, "10 mins"))%>%
summarize(num_entry = n())
## `summarise()` ungrouping output (override with `.groups` argument)
exit_p_sat <- exit_sat %>%
ggplot(aes(x=exit_time, y=num_entry)) +
geom_line(color="#339999") +
xlab("Exit Time on Saturday") +
coord_cartesian( ylim = c(0,750)) +
scale_x_datetime(date_breaks = "2 hours", date_labels = "%a %I %p") +
theme_minimal()
group_size_p_sat <- group_size_sat %>%
ggplot(aes(x=duration,y=group_size)) +
geom_point(color="#339999") +
ylim(c(0,45)) +
ggtitle("Scatterplot of Duration And Group Size on Saturday") +
theme_minimal()
#repeat the same thing for Sunday
time_move_sun <- move_sun %>%
mutate(Timestamp = ymd_hms(Timestamp)) %>%
group_by(id) %>%
mutate(exit_time = max(Timestamp),
entry_time = min(Timestamp)) %>%
filter(Timestamp %in% c(exit_time, entry_time))
## Warning: Problem with `mutate()` input `Timestamp`.
## ℹ 1 failed to parse.
## ℹ Input `Timestamp` is `ymd_hms(Timestamp)`.
## Warning: 1 failed to parse.
duration_sun <- time_move_sun %>%
group_by(id) %>%
mutate(duration = difftime(exit_time, entry_time, units = "hours")) %>%
mutate(duration = as.numeric(duration)) %>%
mutate(duration = round(duration, digits = 6))
#check group size
#n/2 because each id has two entries now
group_size_sun <- duration_sun %>%
group_by(duration) %>%
summarize(group_size=n()/2) %>%
arrange(desc(group_size))
## `summarise()` ungrouping output (override with `.groups` argument)
#plot ppl's entry time,
#round to half an hour
entry_sun <- time_move_sun %>%
distinct(id, entry_time) %>%
group_by(entry_time = round_date(entry_time, "10 mins"))%>%
summarize(num_entry = n())
## `summarise()` ungrouping output (override with `.groups` argument)
entry_p_sun <- entry_sun %>%
ggplot(aes(x=entry_time, y=num_entry)) +
geom_line(color="#FF6600") +
xlab("Entry Time on Sunday") +
coord_cartesian( ylim = c(0,1250)) +
scale_x_datetime(date_breaks = "2 hours", date_labels = "%a %I %p") +
theme_minimal()
exit_sun <- time_move_sun %>%
distinct(id, exit_time) %>%
group_by(exit_time = round_date(exit_time, "10 mins"))%>%
summarize(num_entry = n())
## `summarise()` ungrouping output (override with `.groups` argument)
exit_p_sun <- exit_sun %>%
ggplot(aes(x=exit_time, y=num_entry)) +
geom_line(color="#FF6600") +
xlab("Exit Time on Sunday") +
coord_cartesian( ylim = c(0,750)) +
scale_x_datetime(date_breaks = "2 hours", date_labels = "%a %I %p") +
theme_minimal()
group_size_p_sun <- group_size_sun %>%
ggplot(aes(x=duration,y=group_size)) +
geom_point(color="#FF6600") +
ylim(c(0,45)) +
ggtitle("Scatterplot of Duration And Group Size on Sunday") +
theme_minimal()
library(htmltools)
entry_plot_fri <- ggplotly(entry_p_fri)
entry_plot_sat <- ggplotly(entry_p_sat)
entry_plot_sun <- ggplotly(entry_p_sun)
tagList(entry_plot_fri,entry_plot_sat, entry_plot_sun)
exit_plot_fri <- ggplotly(exit_p_fri)
exit_plot_sat <- ggplotly(exit_p_sat)
exit_plot_sun <- ggplotly(exit_p_sun)
tagList(exit_plot_fri,exit_plot_sat, exit_plot_sun)
group_plotly_fri <- ggplotly(group_size_p_fri)
group_plotly_sat <- ggplotly(group_size_p_sat)
group_plotly_sun <- ggplotly(group_size_p_sun)
tagList(group_plotly_fri,group_plotly_sat, group_plotly_sun)
#to test these are actually group of ppl, we filter out one duration of time, and inspect the movement in that group
group_3_list <- duration_sat %>%
filter(duration == 13.493611)
group_14_list <- duration_sat %>%
filter(duration == 13.360000)
group_3_move <- move_sat %>%
filter(id %in% c(group_3_list$id)) %>%
ggplot(aes(X,Y)) +
geom_count(alpha=0.5,color="#339999") +
# geom_path() +
facet_wrap(~as.factor(id)) +
ggtitle("Individual Movement in a Group of 3 on Saturday") +
theme_minimal()
group_3_move
group_14_move <- move_sat %>%
filter(id %in% c(group_14_list$id)) %>%
ggplot(aes(X,Y)) +
geom_point(alpha=0.5,color="#339999") +
# geom_count() +
# geom_path() +
facet_wrap(~as.factor(id)) +
ggtitle("Individual Movement in a Group of 14 on Saturday") +
theme_minimal()
group_14_move
Citations:
time difference: https://stackoverflow.com/questions/30510044/how-to-make-time-difference-in-same-units-when-subtracting-posixct
Jordan’s demo on time series: https://jcrouser.github.io/CSC235/time-demo.html
arrange plotly objects: https://plotly-r.com/arranging-views.html#arranging-plotly-objects
Color choices: https://stackoverflow.com/questions/26195231/ggplot2-manually-specifying-colour-with-geom-line
ggplot2 geom_count in plotly: https://plotly.com/ggplot2/geom_count/